


<!DOCTYPE html>
<!--[if IE 8]><html class="no-js lt-ie9" lang="en" > <![endif]-->
<!--[if gt IE 8]><!--> <html class="no-js" lang="en" > <!--<![endif]-->
<head>
  <meta charset="utf-8">
  <meta name="generator" content="Docutils 0.17.1: http://docutils.sourceforge.net/" />

  <meta name="viewport" content="width=device-width, initial-scale=1.0">

  <title>Torch-TensorRT Getting Started - EfficientNet-B0 &mdash; Torch-TensorRT v1.3.0 documentation</title>















  <link rel="stylesheet" href="../_static/css/theme.css" type="text/css" />
  <!-- <link rel="stylesheet" href="../_static/pygments.css" type="text/css" /> -->
  <link rel="stylesheet" href="../_static/pygments.css" type="text/css" />
  <link rel="stylesheet" href="../_static/css/theme.css" type="text/css" />
  <link rel="stylesheet" href="../_static/collapsible-lists/css/tree_view.css" type="text/css" />
    <link rel="index" title="Index" href="../genindex.html" />
    <link rel="search" title="Search" href="../search.html" />
  <!-- Google Analytics -->

  <!-- End Google Analytics -->



  <script src="../_static/js/modernizr.min.js"></script>

  <!-- Preload the theme fonts -->

<link rel="preload" href="../_static/fonts/FreightSans/freight-sans-book.woff2" as="font" type="font/woff2" crossorigin="anonymous">
<link rel="preload" href="../_static/fonts/FreightSans/freight-sans-medium.woff2" as="font" type="font/woff2" crossorigin="anonymous">
<link rel="preload" href="../_static/fonts/IBMPlexMono/IBMPlexMono-Medium.woff2" as="font" type="font/woff2" crossorigin="anonymous">
<link rel="preload" href="../_static/fonts/FreightSans/freight-sans-bold.woff2" as="font" type="font/woff2" crossorigin="anonymous">
<link rel="preload" href="../_static/fonts/FreightSans/freight-sans-medium-italic.woff2" as="font" type="font/woff2" crossorigin="anonymous">
<link rel="preload" href="../_static/fonts/IBMPlexMono/IBMPlexMono-SemiBold.woff2" as="font" type="font/woff2" crossorigin="anonymous">

<!-- Preload the katex fonts -->

<link rel="preload" href="https://cdn.jsdelivr.net/npm/katex@0.10.0/dist/fonts/KaTeX_Math-Italic.woff2" as="font" type="font/woff2" crossorigin="anonymous">
<link rel="preload" href="https://cdn.jsdelivr.net/npm/katex@0.10.0/dist/fonts/KaTeX_Main-Regular.woff2" as="font" type="font/woff2" crossorigin="anonymous">
<link rel="preload" href="https://cdn.jsdelivr.net/npm/katex@0.10.0/dist/fonts/KaTeX_Main-Bold.woff2" as="font" type="font/woff2" crossorigin="anonymous">
<link rel="preload" href="https://cdn.jsdelivr.net/npm/katex@0.10.0/dist/fonts/KaTeX_Size1-Regular.woff2" as="font" type="font/woff2" crossorigin="anonymous">
<link rel="preload" href="https://cdn.jsdelivr.net/npm/katex@0.10.0/dist/fonts/KaTeX_Size4-Regular.woff2" as="font" type="font/woff2" crossorigin="anonymous">
<link rel="preload" href="https://cdn.jsdelivr.net/npm/katex@0.10.0/dist/fonts/KaTeX_Size2-Regular.woff2" as="font" type="font/woff2" crossorigin="anonymous">
<link rel="preload" href="https://cdn.jsdelivr.net/npm/katex@0.10.0/dist/fonts/KaTeX_Size3-Regular.woff2" as="font" type="font/woff2" crossorigin="anonymous">
<link rel="preload" href="https://cdn.jsdelivr.net/npm/katex@0.10.0/dist/fonts/KaTeX_Caligraphic-Regular.woff2" as="font" type="font/woff2" crossorigin="anonymous">
  <link rel="stylesheet" href="https://use.fontawesome.com/releases/v5.15.2/css/all.css" integrity="sha384-vSIIfh2YWi9wW0r9iZe7RJPrKwp6bG+s9QZMoITbCckVJqGCCRhc+ccxNcdpHuYu" crossorigin="anonymous">
</head>

<div class="container-fluid header-holder tutorials-header" id="header-holder">
  <div class="container">
    <div class="header-container">
      <a class="header-logo" href="https://pytorch.org/" aria-label="PyTorch"></a>

      <div class="main-menu">
        <ul>
          <li>
            <a href="https://pytorch.org/get-started">Get Started</a>
          </li>

          <li>
            <a href="https://pytorch.org/ecosystem">Ecosystem</a>
          </li>

          <li>
            <a href="https://pytorch.org/mobile">Mobile</a>
          </li>

          <li>
            <a href="https://pytorch.org/blog/">Blog</a>
          </li>

          <li>
            <a href="https://pytorch.org/tutorials">Tutorials</a>
          </li>

          <li>
            <div id="resourcesDropdownButton" data-toggle="resources-dropdown" class="resources-dropdown">
              <a class="resource-option with-down-orange-arrow">
                Docs
              </a>
              <div class="resources-dropdown-menu">
                <a class="doc-dropdown-option nav-dropdown-item" href="https://pytorch.org/docs/stable/index.html">
                  <span class="dropdown-title">PyTorch</span>
                  <p></p>
                </a>
                <a class="doc-dropdown-option nav-dropdown-item" href="https://pytorch.org/audio/stable/index.html">
                  <span class="dropdown-title">torchaudio</span>
                  <p></p>
                </a>
                <a class="doc-dropdown-option nav-dropdown-item" href="https://pytorch.org/text/stable/index.html">
                  <span class="dropdown-title">torchtext</span>
                  <p></p>
                </a>
                <a class="doc-dropdown-option nav-dropdown-item" href="https://pytorch.org/vision/stable/index.html">
                  <span class="dropdown-title">torchvision</span>
                  <p></p>
                </a>
                <a class="doc-dropdown-option nav-dropdown-item" href="https://pytorch.org/torcharrow">
                  <span class="dropdown-title">torcharrow</span>
                  <p></p>
                </a>
                <a class="doc-dropdown-option nav-dropdown-item" href="https://pytorch.org/data">
                  <span class="dropdown-title">TorchData</span>
                  <p></p>
                </a>
                <a class="doc-dropdown-option nav-dropdown-item" href="https://pytorch.org/torchrec">
                  <span class="dropdown-title">TorchRec</span>
                  <p></p>
                </a>
                <a class="doc-dropdown-option nav-dropdown-item" href="https://pytorch.org/serve/">
                  <span class="dropdown-title">TorchServe</span>
                  <p></p>
                </a>
                <a class="doc-dropdown-option nav-dropdown-item" href="https://pytorch.org/torchx/">
                  <span class="dropdown-title">TorchX</span>
                  <p></p>
                </a>
                <a class="doc-dropdown-option nav-dropdown-item" href="https://pytorch.org/xla">
                  <span class="dropdown-title">PyTorch on XLA Devices</span>
                  <p></p>
                </a>
            </div>
          </li>

          <li>
            <div id="resourcesDropdownButton" data-toggle="resources-dropdown" class="resources-dropdown">
              <a class="resource-option with-down-arrow">
                Resources
              </a>
              <div class="resources-dropdown-menu">
                <a class="nav-dropdown-item" href="https://pytorch.org/features">
                  <span class="dropdown-title">About</span>
                  <p>Learn about PyTorch’s features and capabilities</p>
                </a>
                <a class="nav-dropdown-item" href="https://pytorch.org/foundation">
                  <span class="dropdown-title">PyTorch Foundation</span>
                  <p>Learn about the PyTorch foundation</p>
                </a>
                <a class="nav-dropdown-item" href="https://pytorch.org/#community-module">
                  <span class="dropdown-title">Community</span>
                  <p>Join the PyTorch developer community to contribute, learn, and get your questions answered.</p>
                </a>
                <a class="nav-dropdown-item" href="https://pytorch.org/community-stories">
                  <span class="dropdown-title">Community Stories</span>
                  <p>Learn how our community solves real, everyday machine learning problems with PyTorch.</p>
                </a>
                <a class="nav-dropdown-item" href="https://pytorch.org/resources">
                  <span class="dropdown-title">Developer Resources</span>
                  <p>Find resources and get questions answered</p>
                </a>
                <a class="nav-dropdown-item" href="https://pytorch.org/events">
                  <span class="dropdown-title">Events</span>
                  <p>Find events, webinars, and podcasts</p>
                </a>
                <a class="nav-dropdown-item" href="https://discuss.pytorch.org/" target="_blank">
                  <span class="dropdown-title">Forums</span>
                  <p>A place to discuss PyTorch code, issues, install, research</p>
                </a>
                <a class="nav-dropdown-item" href="https://pytorch.org/hub">
                  <span class="dropdown-title">Models (Beta)</span>
                  <p>Discover, publish, and reuse pre-trained models</p>
                </a>
              </div>
            </div>
          </li>

          <li>
            <a href="https://github.com/pytorch/pytorch">GitHub</a>
          </li>
        </ul>
      </div>

      <a class="main-menu-open-button" href="#" data-behavior="open-mobile-menu"></a>
    </div>
  </div>
</div>

<body class="pytorch-body">





    <div class="table-of-contents-link-wrapper">
      <span>Table of Contents</span>
      <a href="#" class="toggle-table-of-contents" data-behavior="toggle-table-of-contents"></a>
    </div>

    <nav data-toggle="wy-nav-shift" class="pytorch-left-menu" id="pytorch-left-menu">
      <div class="pytorch-side-scroll">
        <div class="pytorch-menu pytorch-menu-vertical" data-spy="affix" role="navigation" aria-label="main navigation">
          <div class="pytorch-left-menu-search">





                <div class="version">
                  v1.3.0
                </div>









<div role="search">
  <form id="rtd-search-form" class="wy-form" action="../search.html" method="get">
    <input type="text" name="q" placeholder="Search Docs" />
    <input type="hidden" name="check_keywords" value="yes" />
    <input type="hidden" name="area" value="default" />
  </form>
</div>


          </div>







              <p class="caption" role="heading"><span class="caption-text">Getting Started</span></p>
<ul>
<li class="toctree-l1"><a class="reference internal" href="../getting_started/installation.html">Installation</a></li>
<li class="toctree-l1"><a class="reference internal" href="../getting_started/getting_started_with_python_api.html">Using Torch-TensorRT in Python</a></li>
<li class="toctree-l1"><a class="reference internal" href="../getting_started/getting_started_with_cpp_api.html">Using Torch-TensorRT in  C++</a></li>
</ul>
<p class="caption" role="heading"><span class="caption-text">Tutorials</span></p>
<ul>
<li class="toctree-l1"><a class="reference internal" href="../tutorials/creating_torchscript_module_in_python.html">Creating a TorchScript Module</a></li>
<li class="toctree-l1"><a class="reference internal" href="../tutorials/creating_torchscript_module_in_python.html#working-with-torchscript-in-python">Working with TorchScript in Python</a></li>
<li class="toctree-l1"><a class="reference internal" href="../tutorials/creating_torchscript_module_in_python.html#saving-torchscript-module-to-disk">Saving TorchScript Module to Disk</a></li>
<li class="toctree-l1"><a class="reference internal" href="../tutorials/getting_started_with_fx_path.html">Torch-TensorRT (FX Frontend) User Guide</a></li>
<li class="toctree-l1"><a class="reference internal" href="../tutorials/ptq.html">Post Training Quantization (PTQ)</a></li>
<li class="toctree-l1"><a class="reference internal" href="../tutorials/runtime.html">Deploying Torch-TensorRT Programs</a></li>
<li class="toctree-l1"><a class="reference internal" href="../tutorials/serving_torch_tensorrt_with_triton.html">Serving a Torch-TensorRT model with Triton</a></li>
<li class="toctree-l1"><a class="reference internal" href="../tutorials/use_from_pytorch.html">Using Torch-TensorRT Directly From PyTorch</a></li>
<li class="toctree-l1"><a class="reference internal" href="../tutorials/using_dla.html">DLA</a></li>
<li class="toctree-l1"><a class="reference internal" href="../tutorials/notebooks.html">Example notebooks</a></li>
</ul>
<p class="caption" role="heading"><span class="caption-text">Python API Documenation</span></p>
<ul>
<li class="toctree-l1"><a class="reference internal" href="../py_api/torch_tensorrt.html">torch_tensorrt</a></li>
<li class="toctree-l1"><a class="reference internal" href="../py_api/logging.html">torch_tensorrt.logging</a></li>
<li class="toctree-l1"><a class="reference internal" href="../py_api/ptq.html">torch_tensorrt.ptq</a></li>
<li class="toctree-l1"><a class="reference internal" href="../py_api/ts.html">torch_tensorrt.ts</a></li>
<li class="toctree-l1"><a class="reference internal" href="../py_api/fx.html">torch_tensorrt.fx</a></li>
</ul>
<p class="caption" role="heading"><span class="caption-text">C++ API Documenation</span></p>
<ul>
<li class="toctree-l1"><a class="reference internal" href="../_cpp_api/torch_tensort_cpp.html">Torch-TensorRT C++ API</a></li>
<li class="toctree-l1"><a class="reference internal" href="../_cpp_api/namespace_torch_tensorrt.html">Namespace torch_tensorrt</a></li>
<li class="toctree-l1"><a class="reference internal" href="../_cpp_api/namespace_torch_tensorrt__logging.html">Namespace torch_tensorrt::logging</a></li>
<li class="toctree-l1"><a class="reference internal" href="../_cpp_api/namespace_torch_tensorrt__torchscript.html">Namespace torch_tensorrt::torchscript</a></li>
<li class="toctree-l1"><a class="reference internal" href="../_cpp_api/namespace_torch_tensorrt__ptq.html">Namespace torch_tensorrt::ptq</a></li>
</ul>
<p class="caption" role="heading"><span class="caption-text">CLI Documenation</span></p>
<ul>
<li class="toctree-l1"><a class="reference internal" href="../cli/torchtrtc.html">torchtrtc</a></li>
</ul>
<p class="caption" role="heading"><span class="caption-text">Contributor Documentation</span></p>
<ul>
<li class="toctree-l1"><a class="reference internal" href="../contributors/system_overview.html">System Overview</a></li>
<li class="toctree-l1"><a class="reference internal" href="../contributors/writing_converters.html">Writing Converters</a></li>
<li class="toctree-l1"><a class="reference internal" href="../contributors/useful_links.html">Useful Links for Torch-TensorRT Development</a></li>
</ul>
<p class="caption" role="heading"><span class="caption-text">Indices</span></p>
<ul>
<li class="toctree-l1"><a class="reference internal" href="../indices/supported_ops.html">Operators Supported</a></li>
</ul>



        </div>
      </div>
    </nav>

    <div class="pytorch-container">
      <div class="pytorch-page-level-bar" id="pytorch-page-level-bar">
        <div class="pytorch-breadcrumbs-wrapper">
















<div role="navigation" aria-label="breadcrumbs navigation">

  <ul class="pytorch-breadcrumbs">

      <li>
        <a href="../index.html">

            Docs

        </a> &gt;
      </li>


      <li>Torch-TensorRT Getting Started - EfficientNet-B0</li>


      <li class="pytorch-breadcrumbs-aside">


            <a href="../_sources/_notebooks/EfficientNet-example.ipynb.txt" rel="nofollow"><img src="../_static/images/view-page-source-icon.svg"></a>


      </li>

  </ul>


</div>
        </div>

        <div class="pytorch-shortcuts-wrapper" id="pytorch-shortcuts-wrapper">
          Shortcuts
        </div>
      </div>

      <section data-toggle="wy-nav-shift" id="pytorch-content-wrap" class="pytorch-content-wrap">
        <div class="pytorch-content-left">



          <div class="rst-content">

            <div role="main" class="main-content" itemscope="itemscope" itemtype="http://schema.org/Article">
             <article itemprop="articleBody" id="pytorch-article" class="pytorch-article">


<style>
/* CSS for nbsphinx extension */

/* remove conflicting styling from Sphinx themes */
div.nbinput.container div.prompt *,
div.nboutput.container div.prompt *,
div.nbinput.container div.input_area pre,
div.nboutput.container div.output_area pre,
div.nbinput.container div.input_area .highlight,
div.nboutput.container div.output_area .highlight {
    border: none;
    padding: 0;
    margin: 0;
    box-shadow: none;
}

div.nbinput.container > div[class*=highlight],
div.nboutput.container > div[class*=highlight] {
    margin: 0;
}

div.nbinput.container div.prompt *,
div.nboutput.container div.prompt * {
    background: none;
}

div.nboutput.container div.output_area .highlight,
div.nboutput.container div.output_area pre {
    background: unset;
}

div.nboutput.container div.output_area div.highlight {
    color: unset;  /* override Pygments text color */
}

/* avoid gaps between output lines */
div.nboutput.container div[class*=highlight] pre {
    line-height: normal;
}

/* input/output containers */
div.nbinput.container,
div.nboutput.container {
    display: -webkit-flex;
    display: flex;
    align-items: flex-start;
    margin: 0;
    width: 100%;
}
@media (max-width: 540px) {
    div.nbinput.container,
    div.nboutput.container {
        flex-direction: column;
    }
}

/* input container */
div.nbinput.container {
    padding-top: 5px;
}

/* last container */
div.nblast.container {
    padding-bottom: 5px;
}

/* input prompt */
div.nbinput.container div.prompt pre {
    color: #307FC1;
}

/* output prompt */
div.nboutput.container div.prompt pre {
    color: #BF5B3D;
}

/* all prompts */
div.nbinput.container div.prompt,
div.nboutput.container div.prompt {
    width: 4.5ex;
    padding-top: 5px;
    position: relative;
    user-select: none;
}

div.nbinput.container div.prompt > div,
div.nboutput.container div.prompt > div {
    position: absolute;
    right: 0;
    margin-right: 0.3ex;
}

@media (max-width: 540px) {
    div.nbinput.container div.prompt,
    div.nboutput.container div.prompt {
        width: unset;
        text-align: left;
        padding: 0.4em;
    }
    div.nboutput.container div.prompt.empty {
        padding: 0;
    }

    div.nbinput.container div.prompt > div,
    div.nboutput.container div.prompt > div {
        position: unset;
    }
}

/* disable scrollbars on prompts */
div.nbinput.container div.prompt pre,
div.nboutput.container div.prompt pre {
    overflow: hidden;
}

/* input/output area */
div.nbinput.container div.input_area,
div.nboutput.container div.output_area {
    -webkit-flex: 1;
    flex: 1;
    overflow: auto;
}
@media (max-width: 540px) {
    div.nbinput.container div.input_area,
    div.nboutput.container div.output_area {
        width: 100%;
    }
}

/* input area */
div.nbinput.container div.input_area {
    border: 1px solid #e0e0e0;
    border-radius: 2px;
    /*background: #f5f5f5;*/
}

/* override MathJax center alignment in output cells */
div.nboutput.container div[class*=MathJax] {
    text-align: left !important;
}

/* override sphinx.ext.imgmath center alignment in output cells */
div.nboutput.container div.math p {
    text-align: left;
}

/* standard error */
div.nboutput.container div.output_area.stderr {
    background: #fdd;
}

/* ANSI colors */
.ansi-black-fg { color: #3E424D; }
.ansi-black-bg { background-color: #3E424D; }
.ansi-black-intense-fg { color: #282C36; }
.ansi-black-intense-bg { background-color: #282C36; }
.ansi-red-fg { color: #E75C58; }
.ansi-red-bg { background-color: #E75C58; }
.ansi-red-intense-fg { color: #B22B31; }
.ansi-red-intense-bg { background-color: #B22B31; }
.ansi-green-fg { color: #00A250; }
.ansi-green-bg { background-color: #00A250; }
.ansi-green-intense-fg { color: #007427; }
.ansi-green-intense-bg { background-color: #007427; }
.ansi-yellow-fg { color: #DDB62B; }
.ansi-yellow-bg { background-color: #DDB62B; }
.ansi-yellow-intense-fg { color: #B27D12; }
.ansi-yellow-intense-bg { background-color: #B27D12; }
.ansi-blue-fg { color: #208FFB; }
.ansi-blue-bg { background-color: #208FFB; }
.ansi-blue-intense-fg { color: #0065CA; }
.ansi-blue-intense-bg { background-color: #0065CA; }
.ansi-magenta-fg { color: #D160C4; }
.ansi-magenta-bg { background-color: #D160C4; }
.ansi-magenta-intense-fg { color: #A03196; }
.ansi-magenta-intense-bg { background-color: #A03196; }
.ansi-cyan-fg { color: #60C6C8; }
.ansi-cyan-bg { background-color: #60C6C8; }
.ansi-cyan-intense-fg { color: #258F8F; }
.ansi-cyan-intense-bg { background-color: #258F8F; }
.ansi-white-fg { color: #C5C1B4; }
.ansi-white-bg { background-color: #C5C1B4; }
.ansi-white-intense-fg { color: #A1A6B2; }
.ansi-white-intense-bg { background-color: #A1A6B2; }

.ansi-default-inverse-fg { color: #FFFFFF; }
.ansi-default-inverse-bg { background-color: #000000; }

.ansi-bold { font-weight: bold; }
.ansi-underline { text-decoration: underline; }


div.nbinput.container div.input_area div[class*=highlight] > pre,
div.nboutput.container div.output_area div[class*=highlight] > pre,
div.nboutput.container div.output_area div[class*=highlight].math,
div.nboutput.container div.output_area.rendered_html,
div.nboutput.container div.output_area > div.output_javascript,
div.nboutput.container div.output_area:not(.rendered_html) > img{
    padding: 5px;
    margin: 0;
}

/* fix copybtn overflow problem in chromium (needed for 'sphinx_copybutton') */
div.nbinput.container div.input_area > div[class^='highlight'],
div.nboutput.container div.output_area > div[class^='highlight']{
    overflow-y: hidden;
}

/* hide copybtn icon on prompts (needed for 'sphinx_copybutton') */
.prompt .copybtn {
    display: none;
}

/* Some additional styling taken form the Jupyter notebook CSS */
div.rendered_html table {
  border: none;
  border-collapse: collapse;
  border-spacing: 0;
  color: black;
  font-size: 12px;
  table-layout: fixed;
}
div.rendered_html thead {
  border-bottom: 1px solid black;
  vertical-align: bottom;
}
div.rendered_html tr,
div.rendered_html th,
div.rendered_html td {
  text-align: right;
  vertical-align: middle;
  padding: 0.5em 0.5em;
  line-height: normal;
  white-space: normal;
  max-width: none;
  border: none;
}
div.rendered_html th {
  font-weight: bold;
}
div.rendered_html tbody tr:nth-child(odd) {
  background: #f5f5f5;
}
div.rendered_html tbody tr:hover {
  background: rgba(66, 165, 245, 0.2);
}
</style>
<div class="nbinput nblast docutils container">
<div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[1]:
</pre></div>
</div>
<div class="input_area highlight-ipython3 notranslate"><div class="highlight"><pre><span></span># Copyright 2019 NVIDIA Corporation. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the &quot;License&quot;);
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an &quot;AS IS&quot; BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
</pre></div>
</div>
</div>
<p><img alt="0d3b5ad697c6475f968fdb3988a8493a" src="http://developer.download.nvidia.com/compute/machine-learning/frameworks/nvidia_logo.png" /></p>
<section id="Torch-TensorRT-Getting-Started---EfficientNet-B0">
<h1>Torch-TensorRT Getting Started - EfficientNet-B0<a class="headerlink" href="#Torch-TensorRT-Getting-Started---EfficientNet-B0" title="Permalink to this headline">¶</a></h1>
<section id="Overview">
<h2>Overview<a class="headerlink" href="#Overview" title="Permalink to this headline">¶</a></h2>
<p>In the practice of developing machine learning models, there are few tools as approachable as PyTorch for developing and experimenting in designing machine learning models. The power of PyTorch comes from its deep integration into Python, its flexibility and its approach to automatic differentiation and execution (eager execution). However, when moving from research into production, the requirements change and we may no longer want that deep Python integration and we want optimization to get the
best performance we can on our deployment platform. In PyTorch 1.0, TorchScript was introduced as a method to separate your PyTorch model from Python, make it portable and optimizable. TorchScript uses PyTorch’s JIT compiler to transform your normal PyTorch code which gets interpreted by the Python interpreter to an intermediate representation (IR) which can have optimizations run on it and at runtime can get interpreted by the PyTorch JIT interpreter. For PyTorch this has opened up a whole new
world of possibilities, including deployment in other languages like C++. It also introduces a structured graph based format that we can use to do down to the kernel level optimization of models for inference.</p>
<p>When deploying on NVIDIA GPUs TensorRT, NVIDIA’s Deep Learning Optimization SDK and Runtime is able to take models from any major framework and specifically tune them to perform better on specific target hardware in the NVIDIA family be it an A100, TITAN V, Jetson Xavier or NVIDIA’s Deep Learning Accelerator. TensorRT performs a couple sets of optimizations to achieve this. TensorRT fuses layers and tensors in the model graph, it then uses a large kernel library to select implementations that
perform best on the target GPU. TensorRT also has strong support for reduced operating precision execution which allows users to leverage the Tensor Cores on Volta and newer GPUs as well as reducing memory and computation footprints on device.</p>
<p>Torch-TensorRT is a compiler that uses TensorRT to optimize TorchScript code, compiling standard TorchScript modules into ones that internally run with TensorRT optimizations. This enables you to continue to remain in the PyTorch ecosystem, using all the great features PyTorch has such as module composability, its flexible tensor implementation, data loaders and more. Torch-TensorRT is available to use with both PyTorch and LibTorch.</p>
<section id="Learning-objectives">
<h3>Learning objectives<a class="headerlink" href="#Learning-objectives" title="Permalink to this headline">¶</a></h3>
<p>This notebook demonstrates the steps for compiling a TorchScript module with Torch-TensorRT on a pretrained EfficientNet network, and running it to test the speedup obtained.</p>
</section>
</section>
<section id="Content">
<h2>Content<a class="headerlink" href="#Content" title="Permalink to this headline">¶</a></h2>
<ol class="arabic simple">
<li><p><a class="reference external" href="#1">Requirements</a></p></li>
<li><p><a class="reference external" href="#2">EfficientNet Overview</a></p></li>
<li><p><a class="reference external" href="#3">Running the model without optimizations</a></p></li>
<li><p><a class="reference external" href="#4">Accelerating with Torch-TensorRT</a></p></li>
<li><p><a class="reference external" href="#5">Conclusion</a></p></li>
</ol>
<div class="nbinput docutils container">
<div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[1]:
</pre></div>
</div>
<div class="input_area highlight-ipython3 notranslate"><div class="highlight"><pre><span></span>!pip install timm==0.4.12
!nvidia-smi
</pre></div>
</div>
</div>
<div class="nboutput nblast docutils container">
<div class="prompt empty docutils container">
</div>
<div class="output_area docutils container">
<div class="highlight"><pre>
Looking in indexes: https://pypi.org/simple, https://pypi.ngc.nvidia.com
Collecting timm==0.4.12
  Downloading timm-0.4.12-py3-none-any.whl (376 kB)
     |████████████████████████████████| 376 kB 11.9 MB/s eta 0:00:01
Requirement already satisfied: torch&gt;=1.4 in /opt/conda/lib/python3.8/site-packages (from timm==0.4.12) (1.11.0a0+bfe5ad2)
Requirement already satisfied: torchvision in /opt/conda/lib/python3.8/site-packages (from timm==0.4.12) (0.12.0a0)
Requirement already satisfied: typing_extensions in /opt/conda/lib/python3.8/site-packages (from torch&gt;=1.4-&gt;timm==0.4.12) (4.0.1)
Requirement already satisfied: pillow!=8.3.*,&gt;=5.3.0 in /opt/conda/lib/python3.8/site-packages (from torchvision-&gt;timm==0.4.12) (8.2.0)
Requirement already satisfied: numpy in /opt/conda/lib/python3.8/site-packages (from torchvision-&gt;timm==0.4.12) (1.22.0)
Installing collected packages: timm
Successfully installed timm-0.4.12
<span class="ansi-yellow-fg">WARNING: Running pip as the &#39;root&#39; user can result in broken permissions and conflicting behaviour with the system package manager. It is recommended to use a virtual environment instead: https://pip.pypa.io/warnings/venv</span>
Fri Feb  4 21:29:36 2022
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 510.39.01    Driver Version: 510.39.01    CUDA Version: 11.6     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|===============================+======================+======================|
|   0  NVIDIA GeForce ...  On   | 00000000:65:00.0 Off |                  N/A |
| 30%   28C    P8    11W / 350W |      0MiB / 24576MiB |      0%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+

+-----------------------------------------------------------------------------+
| Processes:                                                                  |
|  GPU   GI   CI        PID   Type   Process name                  GPU Memory |
|        ID   ID                                                   Usage      |
|=============================================================================|
|  No running processes found                                                 |
+-----------------------------------------------------------------------------+
</pre></div></div>
</div>
<p>## 1. Requirements</p>
<p>NVIDIA’s NGC provides PyTorch Docker Container which contains PyTorch and Torch-TensorRT. We can make use of <a class="reference external" href="https://catalog.ngc.nvidia.com/orgs/nvidia/containers/pytorch">latest pytorch</a> container to run this notebook.</p>
<p>Otherwise, you can follow the steps in <code class="docutils literal notranslate"><span class="pre">notebooks/README</span></code> to prepare a Docker container yourself, within which you can run this demo notebook.</p>
<p>## 2. EfficientNet Overview</p>
<p>PyTorch has a model repository called <code class="docutils literal notranslate"><span class="pre">timm</span></code>, which is a source for high quality implementations of computer vision models. We can get our EfficientNet model from there pretrained on ImageNet.</p>
<section id="Model-Description">
<h3>Model Description<a class="headerlink" href="#Model-Description" title="Permalink to this headline">¶</a></h3>
<p>This model is based on the <a class="reference external" href="https://arxiv.org/abs/1905.11946">EfficientNet: Rethinking Model Scaling for Convolutional Neural Networks</a> paper.</p>
<p><img alt="alt" class="no-scaled-link" src="https://1.bp.blogspot.com/-Cdtb97FtgdA/XO3BHsB7oEI/AAAAAAAAEKE/bmtkonwgs8cmWyI5esVo8wJPnhPLQ5bGQCLcBGAs/s1600/image4.png" style="width: 100%;" /></p>
<p>## 3. Running the model without optimizations</p>
<p>PyTorch has a model repository called <code class="docutils literal notranslate"><span class="pre">timm</span></code>, which is a source for high quality implementations of computer vision models. We can get our EfficientNet model from there pretrained on ImageNet.</p>
<div class="nbinput nblast docutils container">
<div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[4]:
</pre></div>
</div>
<div class="input_area highlight-ipython3 notranslate"><div class="highlight"><pre><span></span>import torch
import torch_tensorrt
import timm
import time
import numpy as np
import torch.backends.cudnn as cudnn
from timm.data import resolve_data_config
from timm.data.transforms_factory import create_transform
import json

efficientnet_b0_model = timm.create_model(&#39;efficientnet_b0&#39;,pretrained=True)
model = efficientnet_b0_model.eval().to(&quot;cuda&quot;)
</pre></div>
</div>
</div>
<p>With our model loaded, let’s proceed to downloading some images!</p>
<div class="nbinput docutils container">
<div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[5]:
</pre></div>
</div>
<div class="input_area highlight-ipython3 notranslate"><div class="highlight"><pre><span></span>!mkdir -p ./data
!wget  -O ./data/img0.JPG &quot;https://d17fnq9dkz9hgj.cloudfront.net/breed-uploads/2018/08/siberian-husky-detail.jpg?bust=1535566590&amp;width=630&quot;
!wget  -O ./data/img1.JPG &quot;https://www.hakaimagazine.com/wp-content/uploads/header-gulf-birds.jpg&quot;
!wget  -O ./data/img2.JPG &quot;https://www.artis.nl/media/filer_public_thumbnails/filer_public/00/f1/00f1b6db-fbed-4fef-9ab0-84e944ff11f8/chimpansee_amber_r_1920x1080.jpg__1920x1080_q85_subject_location-923%2C365_subsampling-2.jpg&quot;
!wget  -O ./data/img3.JPG &quot;https://www.familyhandyman.com/wp-content/uploads/2018/09/How-to-Avoid-Snakes-Slithering-Up-Your-Toilet-shutterstock_780480850.jpg&quot;

!wget  -O ./data/imagenet_class_index.json &quot;https://s3.amazonaws.com/deep-learning-models/image-models/imagenet_class_index.json&quot;
</pre></div>
</div>
</div>
<div class="nboutput nblast docutils container">
<div class="prompt empty docutils container">
</div>
<div class="output_area docutils container">
<div class="highlight"><pre>
--2022-02-04 21:30:07--  https://d17fnq9dkz9hgj.cloudfront.net/breed-uploads/2018/08/siberian-husky-detail.jpg?bust=1535566590&amp;width=630
Resolving d17fnq9dkz9hgj.cloudfront.net (d17fnq9dkz9hgj.cloudfront.net)... 18.65.227.127, 18.65.227.37, 18.65.227.99, ...
Connecting to d17fnq9dkz9hgj.cloudfront.net (d17fnq9dkz9hgj.cloudfront.net)|18.65.227.127|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 24112 (24K) [image/jpeg]
Saving to: ‘./data/img0.JPG’

./data/img0.JPG     100%[===================&gt;]  23.55K  --.-KB/s    in 0.004s

2022-02-04 21:30:07 (6.40 MB/s) - ‘./data/img0.JPG’ saved [24112/24112]

--2022-02-04 21:30:07--  https://www.hakaimagazine.com/wp-content/uploads/header-gulf-birds.jpg
Resolving www.hakaimagazine.com (www.hakaimagazine.com)... 164.92.73.117
Connecting to www.hakaimagazine.com (www.hakaimagazine.com)|164.92.73.117|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 452718 (442K) [image/jpeg]
Saving to: ‘./data/img1.JPG’

./data/img1.JPG     100%[===================&gt;] 442.11K  --.-KB/s    in 0.06s

2022-02-04 21:30:07 (6.83 MB/s) - ‘./data/img1.JPG’ saved [452718/452718]

--2022-02-04 21:30:08--  https://www.artis.nl/media/filer_public_thumbnails/filer_public/00/f1/00f1b6db-fbed-4fef-9ab0-84e944ff11f8/chimpansee_amber_r_1920x1080.jpg__1920x1080_q85_subject_location-923%2C365_subsampling-2.jpg
Resolving www.artis.nl (www.artis.nl)... 94.75.225.20
Connecting to www.artis.nl (www.artis.nl)|94.75.225.20|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 361413 (353K) [image/jpeg]
Saving to: ‘./data/img2.JPG’

./data/img2.JPG     100%[===================&gt;] 352.94K   246KB/s    in 1.4s

2022-02-04 21:30:10 (246 KB/s) - ‘./data/img2.JPG’ saved [361413/361413]

--2022-02-04 21:30:10--  https://www.familyhandyman.com/wp-content/uploads/2018/09/How-to-Avoid-Snakes-Slithering-Up-Your-Toilet-shutterstock_780480850.jpg
Resolving www.familyhandyman.com (www.familyhandyman.com)... 104.18.202.107, 104.18.201.107, 2606:4700::6812:ca6b, ...
Connecting to www.familyhandyman.com (www.familyhandyman.com)|104.18.202.107|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 90994 (89K) [image/jpeg]
Saving to: ‘./data/img3.JPG’

./data/img3.JPG     100%[===================&gt;]  88.86K  --.-KB/s    in 0.006s

2022-02-04 21:30:10 (14.4 MB/s) - ‘./data/img3.JPG’ saved [90994/90994]

--2022-02-04 21:30:11--  https://s3.amazonaws.com/deep-learning-models/image-models/imagenet_class_index.json
Resolving s3.amazonaws.com (s3.amazonaws.com)... 52.216.133.45
Connecting to s3.amazonaws.com (s3.amazonaws.com)|52.216.133.45|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 35363 (35K) [application/octet-stream]
Saving to: ‘./data/imagenet_class_index.json’

./data/imagenet_cla 100%[===================&gt;]  34.53K  --.-KB/s    in 0.07s

2022-02-04 21:30:11 (474 KB/s) - ‘./data/imagenet_class_index.json’ saved [35363/35363]

</pre></div></div>
</div>
<p>All pre-trained models expect input images normalized in the same way, i.e. mini-batches of 3-channel RGB images of shape <code class="docutils literal notranslate"><span class="pre">(3</span> <span class="pre">x</span> <span class="pre">H</span> <span class="pre">x</span> <span class="pre">W)</span></code>, where <code class="docutils literal notranslate"><span class="pre">H</span></code> and <code class="docutils literal notranslate"><span class="pre">W</span></code> are expected to be at least <code class="docutils literal notranslate"><span class="pre">224</span></code>. The images have to be loaded in to a range of <code class="docutils literal notranslate"><span class="pre">[0,</span> <span class="pre">1]</span></code> and then normalized using <code class="docutils literal notranslate"><span class="pre">mean</span> <span class="pre">=</span> <span class="pre">[0.485,</span> <span class="pre">0.456,</span> <span class="pre">0.406]</span></code> and <code class="docutils literal notranslate"><span class="pre">std</span> <span class="pre">=</span> <span class="pre">[0.229,</span> <span class="pre">0.224,</span> <span class="pre">0.225]</span></code>.</p>
<p>Here’s a sample execution.</p>
<div class="nbinput docutils container">
<div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[6]:
</pre></div>
</div>
<div class="input_area highlight-ipython3 notranslate"><div class="highlight"><pre><span></span>from PIL import Image
from torchvision import transforms
import matplotlib.pyplot as plt

fig, axes = plt.subplots(nrows=2, ncols=2)

for i in range(4):
    img_path = &#39;./data/img%d.JPG&#39;%i
    img = Image.open(img_path)
    preprocess = transforms.Compose([
        transforms.Resize(256),
        transforms.CenterCrop(224),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
    ])
    input_tensor = preprocess(img)
    plt.subplot(2,2,i+1)
    plt.imshow(img)
    plt.axis(&#39;off&#39;)

# loading labels
with open(&quot;./data/imagenet_class_index.json&quot;) as json_file:
    d = json.load(json_file)
</pre></div>
</div>
</div>
<div class="nboutput nblast docutils container">
<div class="prompt empty docutils container">
</div>
<div class="output_area docutils container">
<img alt="../_images/_notebooks_EfficientNet-example_12_0.png" src="../_images/_notebooks_EfficientNet-example_12_0.png" />
</div>
</div>
<p>Throughout this tutorial, we will be making use of some utility functions; <code class="docutils literal notranslate"><span class="pre">efficientnet_preprocess</span></code> for preprocessing input images, <code class="docutils literal notranslate"><span class="pre">predict</span></code> to use the model for prediction and <code class="docutils literal notranslate"><span class="pre">benchmark</span></code> to benchmark the inference. You do not need to understand/go through these utilities to make use of Torch TensorRT, but are welecomed to do so if you choose.</p>
<div class="nbinput nblast docutils container">
<div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[7]:
</pre></div>
</div>
<div class="input_area highlight-ipython3 notranslate"><div class="highlight"><pre><span></span>cudnn.benchmark = True

def efficientnet_preprocess():
    config = resolve_data_config({}, model=model)
    transform = create_transform(**config)
    return transform

# decode the results into ([predicted class, description], probability)
def predict(img_path, model):
    img = Image.open(img_path)
    preprocess = efficientnet_preprocess()
    input_tensor = preprocess(img)
    input_batch = input_tensor.unsqueeze(0) # create a mini-batch as expected by the model

    # move the input and model to GPU for speed if available
    if torch.cuda.is_available():
        input_batch = input_batch.to(&#39;cuda&#39;)
        model.to(&#39;cuda&#39;)

    with torch.no_grad():
        output = model(input_batch)
        # Tensor of shape 1000, with confidence scores over Imagenet&#39;s 1000 classes
        sm_output = torch.nn.functional.softmax(output[0], dim=0)

    ind = torch.argmax(sm_output)
    return d[str(ind.item())], sm_output[ind] #([predicted class, description], probability)

def benchmark(model, input_shape=(1024, 1, 224, 224), dtype=&#39;fp32&#39;, nwarmup=50, nruns=10000):
    input_data = torch.randn(input_shape)
    input_data = input_data.to(&quot;cuda&quot;)
    if dtype==&#39;fp16&#39;:
        input_data = input_data.half()

    print(&quot;Warm up ...&quot;)
    with torch.no_grad():
        for _ in range(nwarmup):
            features = model(input_data)
    torch.cuda.synchronize()
    print(&quot;Start timing ...&quot;)
    timings = []
    with torch.no_grad():
        for i in range(1, nruns+1):
            start_time = time.time()
            features = model(input_data)
            torch.cuda.synchronize()
            end_time = time.time()
            timings.append(end_time - start_time)
            if i%10==0:
                print(&#39;Iteration %d/%d, avg batch time %.2f ms&#39;%(i, nruns, np.mean(timings)*1000))

    print(&quot;Input shape:&quot;, input_data.size())
    print(&quot;Output features size:&quot;, features.size())
    print(&#39;Average throughput: %.2f images/second&#39;%(input_shape[0]/np.mean(timings)))
</pre></div>
</div>
</div>
<p>With the model downloaded and the util functions written, let’s just quickly see some predictions, and benchmark the model in its current un-optimized state.</p>
<div class="nbinput docutils container">
<div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[8]:
</pre></div>
</div>
<div class="input_area highlight-ipython3 notranslate"><div class="highlight"><pre><span></span>for i in range(4):
    img_path = &#39;./data/img%d.JPG&#39;%i
    img = Image.open(img_path)

    pred, prob = predict(img_path, efficientnet_b0_model)
    print(&#39;{} - Predicted: {}, Probablility: {}&#39;.format(img_path, pred, prob))

    plt.subplot(2,2,i+1)
    plt.imshow(img);
    plt.axis(&#39;off&#39;);
    plt.title(pred[1])
</pre></div>
</div>
</div>
<div class="nboutput docutils container">
<div class="prompt empty docutils container">
</div>
<div class="output_area stderr docutils container">
<div class="highlight"><pre>
/opt/conda/lib/python3.8/site-packages/torchvision/transforms/transforms.py:321: UserWarning: Argument interpolation should be of type InterpolationMode instead of int. Please, use InterpolationMode enum.
  warnings.warn(
</pre></div></div>
</div>
<div class="nboutput docutils container">
<div class="prompt empty docutils container">
</div>
<div class="output_area docutils container">
<div class="highlight"><pre>
./data/img0.JPG - Predicted: [&#39;n02109961&#39;, &#39;Eskimo_dog&#39;], Probablility: 0.3987298309803009
./data/img1.JPG - Predicted: [&#39;n01537544&#39;, &#39;indigo_bunting&#39;], Probablility: 0.23344755172729492
./data/img2.JPG - Predicted: [&#39;n02481823&#39;, &#39;chimpanzee&#39;], Probablility: 0.9695423245429993
./data/img3.JPG - Predicted: [&#39;n01739381&#39;, &#39;vine_snake&#39;], Probablility: 0.227739155292511
</pre></div></div>
</div>
<div class="nboutput nblast docutils container">
<div class="prompt empty docutils container">
</div>
<div class="output_area docutils container">
<img alt="../_images/_notebooks_EfficientNet-example_16_2.png" src="../_images/_notebooks_EfficientNet-example_16_2.png" />
</div>
</div>
<div class="nbinput docutils container">
<div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[9]:
</pre></div>
</div>
<div class="input_area highlight-ipython3 notranslate"><div class="highlight"><pre><span></span># Model benchmark without Torch-TensorRT
benchmark(model, input_shape=(128, 3, 224, 224), nruns=100)
</pre></div>
</div>
</div>
<div class="nboutput nblast docutils container">
<div class="prompt empty docutils container">
</div>
<div class="output_area docutils container">
<div class="highlight"><pre>
Warm up ...
Start timing ...
Iteration 10/100, avg batch time 37.62 ms
Iteration 20/100, avg batch time 37.66 ms
Iteration 30/100, avg batch time 37.65 ms
Iteration 40/100, avg batch time 37.66 ms
Iteration 50/100, avg batch time 37.70 ms
Iteration 60/100, avg batch time 37.70 ms
Iteration 70/100, avg batch time 37.70 ms
Iteration 80/100, avg batch time 37.71 ms
Iteration 90/100, avg batch time 37.72 ms
Iteration 100/100, avg batch time 37.72 ms
Input shape: torch.Size([128, 3, 224, 224])
Output features size: torch.Size([128, 1000])
Average throughput: 3393.46 images/second
</pre></div></div>
</div>
<p>## 4. Accelerating with Torch-TensorRT</p>
<p>Onwards to the next step, accelerating with Torch TensorRT. In these examples we showcase the results for FP32 (single precision) and FP16 (half precision). We do not demonstrat specific tuning, just showcase the simplicity of usage. If you want to learn more about the possible customizations, visit our <a class="reference external" href="https://nvidia.github.io/Torch-TensorRT/">documentation</a>.</p>
</section>
<section id="FP32-(single-precision)">
<h3>FP32 (single precision)<a class="headerlink" href="#FP32-(single-precision)" title="Permalink to this headline">¶</a></h3>
<div class="nbinput docutils container">
<div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[11]:
</pre></div>
</div>
<div class="input_area highlight-ipython3 notranslate"><div class="highlight"><pre><span></span># The compiled module will have precision as specified by &quot;op_precision&quot;.
# Here, it will have FP32 precision.
trt_model_fp32 = torch_tensorrt.compile(model, inputs = [torch_tensorrt.Input((128, 3, 224, 224), dtype=torch.float32)],
    enabled_precisions = torch.float32, # Run with FP32
    workspace_size = 1 &lt;&lt; 22
)
</pre></div>
</div>
</div>
<div class="nboutput nblast docutils container">
<div class="prompt empty docutils container">
</div>
<div class="output_area stderr docutils container">
<div class="highlight"><pre>
WARNING: [Torch-TensorRT] - Mean converter disregards dtype
WARNING: [Torch-TensorRT] - Mean converter disregards dtype
WARNING: [Torch-TensorRT] - Mean converter disregards dtype
WARNING: [Torch-TensorRT] - Mean converter disregards dtype
WARNING: [Torch-TensorRT] - Mean converter disregards dtype
WARNING: [Torch-TensorRT] - Mean converter disregards dtype
WARNING: [Torch-TensorRT] - Mean converter disregards dtype
WARNING: [Torch-TensorRT] - Mean converter disregards dtype
WARNING: [Torch-TensorRT] - Mean converter disregards dtype
WARNING: [Torch-TensorRT] - Mean converter disregards dtype
WARNING: [Torch-TensorRT] - Mean converter disregards dtype
WARNING: [Torch-TensorRT] - Mean converter disregards dtype
WARNING: [Torch-TensorRT] - Mean converter disregards dtype
WARNING: [Torch-TensorRT] - Mean converter disregards dtype
WARNING: [Torch-TensorRT] - Mean converter disregards dtype
WARNING: [Torch-TensorRT] - Mean converter disregards dtype
</pre></div></div>
</div>
<div class="nbinput docutils container">
<div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[12]:
</pre></div>
</div>
<div class="input_area highlight-ipython3 notranslate"><div class="highlight"><pre><span></span># Obtain the average time taken by a batch of input
benchmark(trt_model_fp32, input_shape=(128, 3, 224, 224), nruns=100)
</pre></div>
</div>
</div>
<div class="nboutput nblast docutils container">
<div class="prompt empty docutils container">
</div>
<div class="output_area docutils container">
<div class="highlight"><pre>
Warm up ...
Start timing ...
Iteration 10/100, avg batch time 27.86 ms
Iteration 20/100, avg batch time 27.71 ms
Iteration 30/100, avg batch time 27.99 ms
Iteration 40/100, avg batch time 27.95 ms
Iteration 50/100, avg batch time 27.89 ms
Iteration 60/100, avg batch time 27.85 ms
Iteration 70/100, avg batch time 28.00 ms
Iteration 80/100, avg batch time 27.97 ms
Iteration 90/100, avg batch time 27.95 ms
Iteration 100/100, avg batch time 27.92 ms
Input shape: torch.Size([128, 3, 224, 224])
Output features size: torch.Size([128, 1000])
Average throughput: 4584.06 images/second
</pre></div></div>
</div>
</section>
<section id="FP16-(half-precision)">
<h3>FP16 (half precision)<a class="headerlink" href="#FP16-(half-precision)" title="Permalink to this headline">¶</a></h3>
<div class="nbinput docutils container">
<div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[17]:
</pre></div>
</div>
<div class="input_area highlight-ipython3 notranslate"><div class="highlight"><pre><span></span># The compiled module will have precision as specified by &quot;op_precision&quot;.
# Here, it will have FP16 precision.
trt_model_fp16 = torch_tensorrt.compile(model, inputs = [torch_tensorrt.Input((128, 3, 224, 224), dtype=torch.half)],
    enabled_precisions = {torch.half}, # Run with FP32
    workspace_size = 1 &lt;&lt; 22
)
</pre></div>
</div>
</div>
<div class="nboutput nblast docutils container">
<div class="prompt empty docutils container">
</div>
<div class="output_area stderr docutils container">
<div class="highlight"><pre>
WARNING: [Torch-TensorRT] - For input x.1, found user specified input dtype as Float16, however when inspecting the graph, the input type expected was inferred to be Float
The compiler is going to use the user setting Float16
This conflict may cause an error at runtime due to partial compilation being enabled and therefore
compatibility with PyTorch&#39;s data type convention is required.
If you do indeed see errors at runtime either:
- Remove the dtype spec for x.1
- Disable partial compilation by setting require_full_compilation to True
WARNING: [Torch-TensorRT] - Mean converter disregards dtype
WARNING: [Torch-TensorRT] - Mean converter disregards dtype
WARNING: [Torch-TensorRT] - Mean converter disregards dtype
WARNING: [Torch-TensorRT] - Mean converter disregards dtype
WARNING: [Torch-TensorRT] - Mean converter disregards dtype
WARNING: [Torch-TensorRT] - Mean converter disregards dtype
WARNING: [Torch-TensorRT] - Mean converter disregards dtype
WARNING: [Torch-TensorRT] - Mean converter disregards dtype
WARNING: [Torch-TensorRT] - Mean converter disregards dtype
WARNING: [Torch-TensorRT] - Mean converter disregards dtype
WARNING: [Torch-TensorRT] - Mean converter disregards dtype
WARNING: [Torch-TensorRT] - Mean converter disregards dtype
WARNING: [Torch-TensorRT] - Mean converter disregards dtype
WARNING: [Torch-TensorRT] - Mean converter disregards dtype
WARNING: [Torch-TensorRT] - Mean converter disregards dtype
WARNING: [Torch-TensorRT] - Mean converter disregards dtype
WARNING: [Torch-TensorRT TorchScript Conversion Context] - Tensor DataType is determined at build time for tensors not marked as input or output.
WARNING: [Torch-TensorRT TorchScript Conversion Context] - Tensor DataType is determined at build time for tensors not marked as input or output.
</pre></div></div>
</div>
<div class="nbinput docutils container">
<div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[18]:
</pre></div>
</div>
<div class="input_area highlight-ipython3 notranslate"><div class="highlight"><pre><span></span># Obtain the average time taken by a batch of input
benchmark(trt_model_fp16, input_shape=(128, 3, 224, 224), dtype=&#39;fp16&#39;, nruns=100)
</pre></div>
</div>
</div>
<div class="nboutput nblast docutils container">
<div class="prompt empty docutils container">
</div>
<div class="output_area docutils container">
<div class="highlight"><pre>
Warm up ...
Start timing ...
Iteration 10/100, avg batch time 12.05 ms
Iteration 20/100, avg batch time 12.56 ms
Iteration 30/100, avg batch time 12.39 ms
Iteration 40/100, avg batch time 12.34 ms
Iteration 50/100, avg batch time 12.33 ms
Iteration 60/100, avg batch time 12.32 ms
Iteration 70/100, avg batch time 12.30 ms
Iteration 80/100, avg batch time 12.28 ms
Iteration 90/100, avg batch time 12.35 ms
Iteration 100/100, avg batch time 12.35 ms
Input shape: torch.Size([128, 3, 224, 224])
Output features size: torch.Size([128, 1000])
Average throughput: 10362.23 images/second
</pre></div></div>
</div>
<p>## 5. Conclusion</p>
<p>In this notebook, we have walked through the complete process of compiling TorchScript models with Torch-TensorRT for EfficientNet-B0 model and test the performance impact of the optimization. With Torch-TensorRT, we observe a speedup of <strong>1.35x</strong> with FP32, and <strong>3.13x</strong> with FP16 on an NVIDIA 3090 GPU. These acceleration numbers will vary from GPU to GPU(as well as implementation to implementation based on the ops used) and we encorage you to try out latest generation of Data center compute
cards for maximum acceleration.</p>
</section>
<section id="What’s-next">
<h3>What’s next<a class="headerlink" href="#What’s-next" title="Permalink to this headline">¶</a></h3>
<p>Now it’s time to try Torch-TensorRT on your own model. If you run into any issues, you can fill them at <a class="reference external" href="https://github.com/NVIDIA/Torch-TensorRT">https://github.com/NVIDIA/Torch-TensorRT</a>. Your involvement will help future development of Torch-TensorRT.</p>
<div class="nbinput nblast docutils container">
<div class="prompt highlight-none notranslate"><div class="highlight"><pre><span></span>[ ]:
</pre></div>
</div>
<div class="input_area highlight-ipython3 notranslate"><div class="highlight"><pre><span></span>
</pre></div>
</div>
</div>
</section>
</section>
</section>


             </article>

            </div>
            <footer>




    <hr>



  <div role="contentinfo">
    <p>
        &copy; Copyright 2022, NVIDIA Corporation.

    </p>
  </div>

      <div>
        Built with <a href="http://sphinx-doc.org/">Sphinx</a> using a <a href="https://github.com/rtfd/sphinx_rtd_theme">theme</a> provided by <a href="https://readthedocs.org">Read the Docs</a>.
      </div>


</footer>

          </div>
        </div>

        <div class="pytorch-content-right" id="pytorch-content-right">
          <div class="pytorch-right-menu" id="pytorch-right-menu">
            <div class="pytorch-side-scroll" id="pytorch-side-scroll-right">
              <ul>
<li><a class="reference internal" href="#">Torch-TensorRT Getting Started - EfficientNet-B0</a><ul>
<li><a class="reference internal" href="#Overview">Overview</a><ul>
<li><a class="reference internal" href="#Learning-objectives">Learning objectives</a></li>
</ul>
</li>
<li><a class="reference internal" href="#Content">Content</a><ul>
<li><a class="reference internal" href="#Model-Description">Model Description</a></li>
<li><a class="reference internal" href="#FP32-(single-precision)">FP32 (single precision)</a></li>
<li><a class="reference internal" href="#FP16-(half-precision)">FP16 (half precision)</a></li>
<li><a class="reference internal" href="#What’s-next">What’s next</a></li>
</ul>
</li>
</ul>
</li>
</ul>

            </div>
          </div>
        </div>
      </section>
    </div>







       <script type="text/javascript" id="documentation_options" data-url_root="../" src="../_static/documentation_options.js"></script>
         <script data-url_root="../" id="documentation_options" src="../_static/documentation_options.js"></script>
         <script src="../_static/jquery.js"></script>
         <script src="../_static/underscore.js"></script>
         <script src="../_static/doctools.js"></script>
         <script src="../_static/collapsible-lists/js/CollapsibleLists.compressed.js"></script>
         <script src="../_static/collapsible-lists/js/apply-collapsible-lists.js"></script>
         <script crossorigin="anonymous" integrity="sha256-Ae2Vz/4ePdIu6ZyI/5ZGsYnb+m0JlOmKPjt6XZ9JJkA=" src="https://cdnjs.cloudflare.com/ajax/libs/require.js/2.3.4/require.min.js"></script>
         <script>window.MathJax = {"tex": {"inlineMath": [["$", "$"], ["\\(", "\\)"]], "processEscapes": true}, "options": {"ignoreHtmlClass": "tex2jax_ignore|mathjax_ignore|document", "processHtmlClass": "tex2jax_process|mathjax_process|math|output_area"}}</script>
         <script defer="defer" src="https://cdn.jsdelivr.net/npm/mathjax@3/es5/tex-mml-chtml.js"></script>




  <script type="text/javascript" src="../_static/js/vendor/popper.min.js"></script>
  <script type="text/javascript" src="../_static/js/vendor/bootstrap.min.js"></script>
  <script src="https://cdnjs.cloudflare.com/ajax/libs/list.js/1.5.0/list.min.js"></script>
  <script type="text/javascript" src="../_static/js/theme.js"></script>

  <script type="text/javascript">
      jQuery(function () {
          SphinxRtdTheme.Navigation.enable(true);
      });
  </script>

  <!-- Begin Footer -->

  <div class="container-fluid docs-tutorials-resources" id="docs-tutorials-resources">
    <div class="container">
      <div class="row">
        <div class="col-md-4 text-center">
          <h2>Docs</h2>
          <p>Access comprehensive developer documentation for PyTorch</p>
          <a class="with-right-arrow" href="https://pytorch.org/docs/stable/index.html">View Docs</a>
        </div>

        <div class="col-md-4 text-center">
          <h2>Tutorials</h2>
          <p>Get in-depth tutorials for beginners and advanced developers</p>
          <a class="with-right-arrow" href="https://pytorch.org/tutorials">View Tutorials</a>
        </div>

        <div class="col-md-4 text-center">
          <h2>Resources</h2>
          <p>Find development resources and get your questions answered</p>
          <a class="with-right-arrow" href="https://pytorch.org/resources">View Resources</a>
        </div>
      </div>
    </div>
  </div>

  <footer class="site-footer">
    <div class="container footer-container">
      <div class="footer-logo-wrapper">
        <a href="https://pytorch.org/" class="footer-logo"></a>
      </div>

      <div class="footer-links-wrapper">
        <div class="footer-links-col">
          <ul>
            <li class="list-title"><a href="https://pytorch.org/">PyTorch</a></li>
            <li><a href="https://pytorch.org/get-started">Get Started</a></li>
            <li><a href="https://pytorch.org/features">Features</a></li>
            <li><a href="https://pytorch.org/ecosystem">Ecosystem</a></li>
            <li><a href="https://pytorch.org/blog/">Blog</a></li>
            <li><a href="https://github.com/pytorch/pytorch/blob/master/CONTRIBUTING.md">Contributing</a></li>
          </ul>
        </div>

        <div class="footer-links-col">
          <ul>
            <li class="list-title"><a href="https://pytorch.org/resources">Resources</a></li>
            <li><a href="https://pytorch.org/tutorials">Tutorials</a></li>
            <li><a href="https://pytorch.org/docs/stable/index.html">Docs</a></li>
            <li><a href="https://discuss.pytorch.org" target="_blank">Discuss</a></li>
            <li><a href="https://github.com/pytorch/pytorch/issues" target="_blank">Github Issues</a></li>
            <li><a href="https://pytorch.org/assets/brand-guidelines/PyTorch-Brand-Guidelines.pdf" target="_blank">Brand Guidelines</a></li>
          </ul>
        </div>

        <div class="footer-links-col">
          <ul>
            <li class="list-title">Stay up to date</li>
            <li><a href="https://www.facebook.com/pytorch" target="_blank">Facebook</a></li>
            <li><a href="https://twitter.com/pytorch" target="_blank">Twitter</a></li>
            <li><a href="https://www.youtube.com/pytorch" target="_blank">YouTube</a></li>
            <li><a href="https://www.linkedin.com/company/pytorch" target="_blank">LinkedIn</a></li>
          </ul>
          </div>

        <div class="footer-links-col">
          <ul>
            <li class="list-title">PyTorch Podcasts</li>
            <li><a href="https://open.spotify.com/show/6UzHKeiy368jKfQMKKvJY5" target="_blank">Spotify</a></li>
            <li><a href="https://podcasts.apple.com/us/podcast/pytorch-developer-podcast/id1566080008" target="_blank">Apple</a></li>
            <li><a href="https://www.google.com/podcasts?feed=aHR0cHM6Ly9mZWVkcy5zaW1wbGVjYXN0LmNvbS9PQjVGa0lsOA%3D%3D" target="_blank">Google</a></li>
            <li><a href="https://music.amazon.com/podcasts/7a4e6f0e-26c2-49e9-a478-41bd244197d0/PyTorch-Developer-Podcast?" target="_blank">Amazon</a></li>
          </ul>
         </div>
        </div>

        <div class="privacy-policy">
          <ul>
            <li class="privacy-policy-links"><a href="https://www.linuxfoundation.org/terms/" target="_blank">Terms</a></li>
            <li class="privacy-policy-links">|</li>
            <li class="privacy-policy-links"><a href="https://www.linuxfoundation.org/privacy-policy/" target="_blank">Privacy</a></li>
          </ul>
        </div>
        <div class="copyright">
        <p>© Copyright The Linux Foundation. The PyTorch Foundation is a project of The Linux Foundation.
          For web site terms of use, trademark policy and other policies applicable to The PyTorch Foundation please see
          <a href="www.linuxfoundation.org/policies/">www.linuxfoundation.org/policies/</a>. The PyTorch Foundation supports the PyTorch open source
          project, which has been established as PyTorch Project a Series of LF Projects, LLC. For policies applicable to the PyTorch Project a Series of LF Projects, LLC,
          please see <a href="www.lfprojects.org/policies/">www.lfprojects.org/policies/</a>.</p>
      </div>
     </div>

  </footer>

  <div class="cookie-banner-wrapper">
  <div class="container">
    <p class="gdpr-notice">To analyze traffic and optimize your experience, we serve cookies on this site. By clicking or navigating, you agree to allow our usage of cookies. As the current maintainers of this site, Facebook’s Cookies Policy applies. Learn more, including about available controls: <a href="https://www.facebook.com/policies/cookies/">Cookies Policy</a>.</p>
    <img class="close-button" src="../_static/images/pytorch-x.svg">
  </div>
</div>

  <!-- End Footer -->

  <!-- Begin Mobile Menu -->

  <div class="mobile-main-menu">
    <div class="container-fluid">
      <div class="container">
        <div class="mobile-main-menu-header-container">
          <a class="header-logo" href="https://pytorch.org/" aria-label="PyTorch"></a>
          <a class="main-menu-close-button" href="#" data-behavior="close-mobile-menu"></a>
        </div>
      </div>
    </div>

    <div class="mobile-main-menu-links-container">
      <div class="main-menu">
        <ul>
          <li>
            <a href="https://pytorch.org/get-started">Get Started</a>
          </li>

          <li>
            <a href="https://pytorch.org/ecosystem">Ecosystem</a>
          </li>

          <li>
            <a href="https://pytorch.org/mobile">Mobile</a>
          </li>

          <li>
            <a href="https://pytorch.org/blog/">Blog</a>
          </li>

          <li>
            <a href="https://pytorch.org/tutorials">Tutorials</a>
          </li>

          <li class="resources-mobile-menu-title">
            Docs
          </li>

          <ul class="resources-mobile-menu-items">
            <li>
              <a href="https://pytorch.org/docs/stable/index.html">PyTorch</a>
            </li>

            <li>
              <a href="https://pytorch.org/audio/stable/index.html">torchaudio</a>
            </li>

            <li>
              <a href="https://pytorch.org/text/stable/index.html">torchtext</a>
            </li>

            <li>
              <a href="https://pytorch.org/vision/stable/index.html">torchvision</a>
            </li>

            <li>
              <a href="https://pytorch.org/torcharrow">torcharrow</a>
            </li>

            <li>
              <a href="https://pytorch.org/data">TorchData</a>
            </li>

            <li>
              <a href="https://pytorch.org/torchrec">TorchRec</a>
            </li>

            <li>
              <a href="https://pytorch.org/serve/">TorchServe</a>
            </li>

            <li>
              <a href="https://pytorch.org/torchx/">TorchX</a>
            </li>

            <li>
              <a href="https://pytorch.org/xla">PyTorch on XLA Devices</a>
            </li>
          </ul>

          <li class="resources-mobile-menu-title">
            Resources
          </li>

           <ul class="resources-mobile-menu-items">

            <li>
              <a href="https://pytorch.org/features">About</a>
            </li>

            <li>
              <a href="https://pytorch.org/foundation">PyTorch Foundation</a>
            </li>

            <li>
              <a href="https://pytorch.org/#community-module">Community</a>
            </li>

            <li>
              <a href="https://pytorch.org/community-stories">Community Stories</a>
            </li>

            <li>
              <a href="https://pytorch.org/resources">Developer Resources</a>
            </li>

            <li>
              <a href="https://pytorch.org/events">Events</a>
            </li>

            <li>
              <a href="https://discuss.pytorch.org/">Forums</a>
            </li>

            <li>
              <a href="https://pytorch.org/hub">Models (Beta)</a>
            </li>
          </ul>

          <li>
            <a href="https://github.com/pytorch/pytorch">Github</a>
          </li>
        </ul>
      </div>
    </div>
  </div>

  <!-- End Mobile Menu -->

  <script type="text/javascript" src="../_static/js/vendor/anchor.min.js"></script>

  <script type="text/javascript">
    $(document).ready(function() {
      mobileMenu.bind();
      mobileTOC.bind();
      pytorchAnchors.bind();
      sideMenus.bind();
      scrollToAnchor.bind();
      highlightNavigation.bind();
      mainMenuDropdown.bind();
      filterTags.bind();

      // Add class to links that have code blocks, since we cannot create links in code blocks
      $("article.pytorch-article a span.pre").each(function(e) {
        $(this).closest("a").addClass("has-code");
      });
    })
  </script>
</body>
</html>